# 通过Xpath来筛选数据
# 通过Xpath筛选出百度首页的热搜
import requests
from lxml import etree
from fake_useragent import FakeUserAgent

# 定义情求头并发起网络请求
headers = {'User-Agent': FakeUserAgent().random}
url = 'https://www.baidu.com'
response = requests.get(url, headers=headers)

# 将文本数据转化为树状结构
tree = etree.HTML(response.text)

# 通过Xpath进行筛选
data = tree.xpath('//li/a/span[@class="title-content-title"]/text()')
print(data)
